Importing the important libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
%matplotlib inline
Reading the CSV file
data = pd.read_csv("D:\Rahul\Internship\dataset\orders_2020_2021_DataSet_Updated.csv")
Showing the Top 5 rows of dataset
data.head()
| Order # | Fulfillment Date and Time Stamp | Currency | Subtotal | Shipping Method | Shipping Cost | Tax Method | Taxes | Total | Coupon Code | ... | Tracking # | Special Instructions | LineItem Name | LineItem SKU | LineItem Options | LineItem Add-ons | LineItem Qty | LineItem Sale Price | Download Status | LineItem Type | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | R121113121 | NaN | INR | 2299 | Ships Free | 0 | NaN | 0.0 | 2299 | NaN | ... | NaN | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | PL0093416 | NaN | NaN | 1 | 2299 | NaN | physical |
| 1 | R472890631 | NaN | INR | 2299 | Ships Free | 0 | NaN | 0.0 | 2299 | NaN | ... | NaN | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | PL0093416 | NaN | NaN | 1 | 2299 | NaN | physical |
| 2 | R004476488 | NaN | INR | 2299 | Ships Free | 0 | NaN | 0.0 | 2299 | NaN | ... | NaN | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | PL0093416 | NaN | NaN | 1 | 2299 | NaN | physical |
| 3 | R526038353 | NaN | INR | 2299 | Ships Free | 0 | NaN | 0.0 | 2299 | NaN | ... | NaN | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | PL0093416 | NaN | NaN | 1 | 2299 | NaN | physical |
| 4 | R658530771 | NaN | INR | 349 | Ships Free | 0 | NaN | 0.0 | 349 | NaN | ... | NaN | NaN | Barbie Doll (pink) | PL00098 | NaN | NaN | 1 | 349 | NaN | physical |
5 rows × 38 columns
data.columns
Index(['Order #', 'Fulfillment Date and Time Stamp', 'Currency', 'Subtotal',
'Shipping Method', 'Shipping Cost', 'Tax Method', 'Taxes', 'Total',
'Coupon Code', 'Coupon Code Name', ' ', 'Billing Name',
'Billing Country', 'Billing Street Address', 'Billing Street Address 2',
'Billing City', 'Billing State', 'Billing Zip', 'Shipping Name',
'Shipping Country', 'Shipping Street Address',
'Shipping Street Address 2', 'Shipping City', 'Shipping State',
'Shipping Zip', 'Gift Cards', 'Payment Method', 'Tracking #',
'Special Instructions', 'LineItem Name', 'LineItem SKU',
'LineItem Options', 'LineItem Add-ons', 'LineItem Qty',
'LineItem Sale Price', 'Download Status', 'LineItem Type'],
dtype='object')
Gathering the Columns that are needed
data = data[['Order #', 'Fulfillment Date and Time Stamp', 'Currency', 'Total', 'Shipping Country',
'Shipping City', 'Shipping State', 'Payment Method', 'LineItem Name', 'LineItem Qty']]
data.head()
| Order # | Fulfillment Date and Time Stamp | Currency | Total | Shipping Country | Shipping City | Shipping State | Payment Method | LineItem Name | LineItem Qty | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | R121113121 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 1 | R472890631 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 2 | R004476488 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 3 | R526038353 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 4 | R658530771 | NaN | INR | 349 | IND | NEW DELHI | IN-DL | NaN | Barbie Doll (pink) | 1 |
Changing the column name to the standard need
data.rename(columns={'Order #':'Order No', 'Fulfillment Date and Time Stamp':'DateTime Stamp', 'LineItem Name':'Product Name',
'LineItem Qty':'Quantity'}, inplace=True)
data.columns
Index(['Order No', 'DateTime Stamp', 'Currency', 'Total', 'Shipping Country',
'Shipping City', 'Shipping State', 'Payment Method', 'Product Name',
'Quantity'],
dtype='object')
data.shape
(3039, 10)
Handling the Duplicate records in the dataset
data.duplicated().sum()
3
data.drop_duplicates()
| Order No | DateTime Stamp | Currency | Total | Shipping Country | Shipping City | Shipping State | Payment Method | Product Name | Quantity | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | R121113121 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 1 | R472890631 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 2 | R004476488 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 3 | R526038353 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 4 | R658530771 | NaN | INR | 349 | IND | NEW DELHI | IN-DL | NaN | Barbie Doll (pink) | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3034 | R243395005 | NaN | INR | 50 | IND | Gccch | IN-AN | NaN | Ear Wired Earphones With Mic White | 1 |
| 3035 | R607209508 | NaN | INR | 500 | IND | BERHAMPUR | IN-OR | NaN | I Kall K-66 White | 1 |
| 3036 | R140316623 | NaN | INR | 4000 | IND | berhampur | IN-OR | NaN | Electric Steam Cooker Maestro MC1 | 2 |
| 3037 | R192037084 | NaN | INR | 2000 | IND | sdas | IN-AN | NaN | Electric Steam Cooker Maestro MC1 | 1 |
| 3038 | R414254148 | 28-09-2016 19:05:30 +0530 | INR | 2000 | IND | sdas | IN-AN | Offline Payment ₹2,000.00 | Electric Steam Cooker Maestro MC1 | 1 |
3036 rows × 10 columns
Result : After dropping 3 duplicate values now there is 3036 records in dataset.
Checking the null values in each column
data.isnull().sum()
Order No 0 DateTime Stamp 2801 Currency 0 Total 0 Shipping Country 0 Shipping City 19 Shipping State 22 Payment Method 2780 Product Name 0 Quantity 0 dtype: int64
Checking the percentage(%) of missing values in columns
data.isnull().mean()
Order No 0.000000 DateTime Stamp 0.921685 Currency 0.000000 Total 0.000000 Shipping Country 0.000000 Shipping City 0.006252 Shipping State 0.007239 Payment Method 0.914775 Product Name 0.000000 Quantity 0.000000 dtype: float64
Checking the number of unique value in each column
data.nunique()
Order No 2971 DateTime Stamp 203 Currency 1 Total 249 Shipping Country 21 Shipping City 1376 Shipping State 52 Payment Method 106 Product Name 270 Quantity 24 dtype: int64
data.head()
| Order No | DateTime Stamp | Currency | Total | Shipping Country | Shipping City | Shipping State | Payment Method | Product Name | Quantity | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | R121113121 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 1 | R472890631 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 2 | R004476488 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 3 | R526038353 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 4 | R658530771 | NaN | INR | 349 | IND | NEW DELHI | IN-DL | NaN | Barbie Doll (pink) | 1 |
india_country = data[data['Shipping Country'] == 'IND']
top_state = india_country[['Shipping State', 'Shipping Country']]
top_state.head()
| Shipping State | Shipping Country | |
|---|---|---|
| 0 | IN-MH | IND |
| 1 | IN-MH | IND |
| 2 | IN-MH | IND |
| 3 | IN-MH | IND |
| 4 | IN-DL | IND |
top_state['Count States'] = india_country.groupby(['Shipping State'])['Order No'].transform('count')
top_state.head()
C:\Users\Admin\AppData\Local\Temp\ipykernel_10540\4150116577.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
top_state['Count States'] = india_country.groupby(['Shipping State'])['Order No'].transform('count')
| Shipping State | Shipping Country | Count States | |
|---|---|---|---|
| 0 | IN-MH | IND | 379 |
| 1 | IN-MH | IND | 379 |
| 2 | IN-MH | IND | 379 |
| 3 | IN-MH | IND | 379 |
| 4 | IN-DL | IND | 129 |
top_state.drop(['Shipping Country'], axis=1, inplace=True)
top_state.columns
C:\Users\Admin\AppData\Local\Temp\ipykernel_10540\4002702850.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy top_state.drop(['Shipping Country'], axis=1, inplace=True)
Index(['Shipping State', 'Count States'], dtype='object')
top_state['Count States'].astype('int64')
0 379
1 379
2 379
3 379
4 129
...
3034 155
3035 80
3036 80
3037 155
3038 155
Name: Count States, Length: 2999, dtype: int64
top_10_states = top_state.groupby(['Shipping State'], as_index=False)['Count States'].median().sort_values(by='Count States', ascending=False)
top_10_states = top_10_states.head(10)
top_10_states.reset_index(inplace=True, drop=True)
top_10_states.head()
| Shipping State | Count States | |
|---|---|---|
| 0 | IN-MH | 379.0 |
| 1 | IN-TN | 356.0 |
| 2 | IN-UP | 289.0 |
| 3 | IN-KA | 205.0 |
| 4 | IN-AP | 178.0 |
top_10_states
| Shipping State | Count States | |
|---|---|---|
| 0 | IN-MH | 379.0 |
| 1 | IN-TN | 356.0 |
| 2 | IN-UP | 289.0 |
| 3 | IN-KA | 205.0 |
| 4 | IN-AP | 178.0 |
| 5 | IN-TG | 159.0 |
| 6 | IN-AN | 155.0 |
| 7 | IN-DL | 129.0 |
| 8 | IN-BR | 123.0 |
| 9 | IN-WB | 120.0 |
Replacing the values of [Shipping State] to the understandable format
top_10_states['Shipping State'] = top_10_states['Shipping State'].replace(['IN-MH', 'IN-TN', 'IN-UP', 'IN-KA', 'IN-AP', 'IN-TG', 'IN-AN', 'IN-DL', 'IN-BR', 'IN-WB'],
['Maharashtra', 'Tamil Nadu', 'Uttar Pradesh', 'Karnataka', 'Andhra Pradesh', 'Telangana', 'Andaman and Nicobar Islands',
'Delhi', 'Bihar', 'West Bengal'])
top_10_states.head()
| Shipping State | Count States | |
|---|---|---|
| 0 | Maharashtra | 379.0 |
| 1 | Tamil Nadu | 356.0 |
| 2 | Uttar Pradesh | 289.0 |
| 3 | Karnataka | 205.0 |
| 4 | Andhra Pradesh | 178.0 |
Visualizing the Results of 'Sales By States'
state_plot = px.bar(top_10_states, x='Shipping State', y='Count States', title='Top 10 Number of Sales By States',
labels={'Shipping State':'State', 'Count States':'Count of Orders'}, text_auto=True, color='Shipping State')
state_plot.show()
Filtering the non-nan values in the dataset.
year_sale_data = data[data['DateTime Stamp'].notnull()]
year_sale_data.head()
| Order No | DateTime Stamp | Currency | Total | Shipping Country | Shipping City | Shipping State | Payment Method | Product Name | Quantity | |
|---|---|---|---|---|---|---|---|---|---|---|
| 15 | R679506806 | 23-09-2021 18:09:27 +0530 | INR | 1000 | IND | VARANASI | IN-UP | NaN | ONLINE TUITION FOR STD 4 and STD 5(FREE 5 DAYS... | 1 |
| 18 | R444302271 | 23-09-2021 18:39:07 +0530 | INR | 499 | IND | Delhi | IN-DL | NaN | Fun Activities for Kids LKG to STD 10(Singing,... | 1 |
| 21 | R181106661 | 23-09-2021 18:39:50 +0530 | INR | 499 | IND | Delhi | IN-DL | NaN | Fun Activities for Kids LKG to STD 10(Singing,... | 1 |
| 254 | R798254987 | 27-04-2021 15:08:07 +0530 | INR | 321 | IND | Berhampur | IN-OR | NaN | Mutton Curry Cut 500gm (Berhampur) | 1 |
| 292 | R299286513 | 05-04-2021 22:50:28 +0530 | INR | 999 | IND | Hyderabad | IN-AP | NaN | REALME BUDS PLUS Wireless Bluetooth Ears Buds ... | 1 |
year_sale_data.shape
(238, 10)
year_sale_data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 238 entries, 15 to 3038 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Order No 238 non-null object 1 DateTime Stamp 238 non-null object 2 Currency 238 non-null object 3 Total 238 non-null object 4 Shipping Country 238 non-null object 5 Shipping City 237 non-null object 6 Shipping State 238 non-null object 7 Payment Method 233 non-null object 8 Product Name 238 non-null object 9 Quantity 238 non-null int64 dtypes: int64(1), object(9) memory usage: 20.5+ KB
year_sale_data['DateTime Stamp'] = year_sale_data['DateTime Stamp'].astype('datetime64[ns]')
year_sale_data.dtypes
C:\Users\Admin\AppData\Local\Temp\ipykernel_10540\2800283185.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
Order No object DateTime Stamp datetime64[ns] Currency object Total object Shipping Country object Shipping City object Shipping State object Payment Method object Product Name object Quantity int64 dtype: object
year_sale_data['Total']=year_sale_data['Total'].astype('float64')
year_sale_data['Total'] = year_sale_data['Total'].astype('int64')
year_sale_data.dtypes
C:\Users\Admin\AppData\Local\Temp\ipykernel_10540\1181335453.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\Admin\AppData\Local\Temp\ipykernel_10540\1181335453.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
Order No object DateTime Stamp datetime64[ns] Currency object Total int64 Shipping Country object Shipping City object Shipping State object Payment Method object Product Name object Quantity int64 dtype: object
year_sale_data['Year'] = pd.to_datetime(year_sale_data['DateTime Stamp']).dt.year
C:\Users\Admin\AppData\Local\Temp\ipykernel_10540\759780284.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
Handling the dupicate values for better results.
year_sale_data.duplicated().sum()
2
year_sale_data[year_sale_data.duplicated()]
| Order No | DateTime Stamp | Currency | Total | Shipping Country | Shipping City | Shipping State | Payment Method | Product Name | Quantity | Year | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 2558 | R754403637 | 2019-03-28 19:29:45 | INR | 44970 | IND | Varanasi | IN-UP | Offline Payment ₹44,970.00 | Vmax HX 750 Remote Control Flying Drone No Camera | 15 | 2019 |
| 2614 | R446418996 | 2019-05-31 20:03:48 | INR | 46166 | IND | Varanasi | IN-UP | Offline Payment ₹46,166.00 | Vmax HX 750 Remote Control Flying Drone No Camera | 5 | 2019 |
year_sale_data.drop_duplicates(inplace=True)
year_sale_data.duplicated().sum()
C:\Users\Admin\AppData\Local\Temp\ipykernel_10540\2456456962.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
0
year_sale_data.head()
| Order No | DateTime Stamp | Currency | Total | Shipping Country | Shipping City | Shipping State | Payment Method | Product Name | Quantity | Year | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 15 | R679506806 | 2021-09-23 12:39:27 | INR | 1000 | IND | VARANASI | IN-UP | NaN | ONLINE TUITION FOR STD 4 and STD 5(FREE 5 DAYS... | 1 | 2021 |
| 18 | R444302271 | 2021-09-23 13:09:07 | INR | 499 | IND | Delhi | IN-DL | NaN | Fun Activities for Kids LKG to STD 10(Singing,... | 1 | 2021 |
| 21 | R181106661 | 2021-09-23 13:09:50 | INR | 499 | IND | Delhi | IN-DL | NaN | Fun Activities for Kids LKG to STD 10(Singing,... | 1 | 2021 |
| 254 | R798254987 | 2021-04-27 09:38:07 | INR | 321 | IND | Berhampur | IN-OR | NaN | Mutton Curry Cut 500gm (Berhampur) | 1 | 2021 |
| 292 | R299286513 | 2021-05-04 17:20:28 | INR | 999 | IND | Hyderabad | IN-AP | NaN | REALME BUDS PLUS Wireless Bluetooth Ears Buds ... | 1 | 2021 |
yearly_sale = year_sale_data.groupby(['Year'], as_index=False)['Total'].mean()
yearly_sale
| Year | Total | |
|---|---|---|
| 0 | 2016 | 707.142857 |
| 1 | 2017 | 1123.000000 |
| 2 | 2018 | 9163.448819 |
| 3 | 2019 | 6571.983333 |
| 4 | 2020 | 1419.833333 |
| 5 | 2021 | 2234.250000 |
yearly_sale['Total'] = yearly_sale['Total'].round(1)
yearly_sale
| Year | Total | |
|---|---|---|
| 0 | 2016 | 707.1 |
| 1 | 2017 | 1123.0 |
| 2 | 2018 | 9163.4 |
| 3 | 2019 | 6572.0 |
| 4 | 2020 | 1419.8 |
| 5 | 2021 | 2234.2 |
Showing the results through visualization
year_fig = px.line(data_frame=yearly_sale, x='Year', y='Total', title='Average Sale by Year', labels={'Total':'Average Sale'},
text='Total', markers=True)
year_fig.show()
data.head()
| Order No | DateTime Stamp | Currency | Total | Shipping Country | Shipping City | Shipping State | Payment Method | Product Name | Quantity | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | R121113121 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 1 | R472890631 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 2 | R004476488 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 3 | R526038353 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 4 | R658530771 | NaN | INR | 349 | IND | NEW DELHI | IN-DL | NaN | Barbie Doll (pink) | 1 |
pay_method_data= data[data['Payment Method'].notnull()]
pay_method_data.head()
| Order No | DateTime Stamp | Currency | Total | Shipping Country | Shipping City | Shipping State | Payment Method | Product Name | Quantity | |
|---|---|---|---|---|---|---|---|---|---|---|
| 26 | R113535804 | NaN | INR | 999 | IND | Dombivali | IN-MH | CCAvenue ₹999.00 | Lava Captain N1 Red | 1 |
| 137 | R913187467 | NaN | INR | 219 | IND | Kakinada | IN-AP | CCAvenue ₹219.00 | Fresh Mushrooms Button 200gm (Berhampur) | 1 |
| 138 | R204461089 | NaN | INR | 1499 | IND | Dindigul | IN-TN | CCAvenue ₹1,499.00 | Vmax HX 750 Quadcopter Drone (No Camera) | 1 |
| 146 | R602350202 | NaN | INR | 349 | IND | Bengaluru | IN-KA | CCAvenue ₹349.00 | Boat Super EXTRA BASS HEADS 225 WIRED WITH MIC... | 1 |
| 147 | R760464983 | NaN | INR | 799 | IND | NOIDA | IN-UP | CCAvenue ₹799.00 | Solanki King Car Bugatti Style Steering Radio ... | 1 |
pay_method_data['Payment Method'].unique()
array(['CCAvenue ₹999.00', 'CCAvenue ₹219.00', 'CCAvenue ₹1,499.00',
'CCAvenue ₹349.00', 'CCAvenue ₹799.00', 'CCAvenue ₹150.00',
'CCAvenue ₹269.00', 'CCAvenue ₹399.00',
'Offline Payment ₹8,999.00', 'Offline Payment ₹799.00',
'Offline Payment ₹8,999.00;Offline Payment -₹8,999.00',
'CCAvenue ₹1,798.00', 'CCAvenue ₹1,299.00',
'Offline Payment ₹2,299.00', 'CCAvenue ₹400.00',
'CCAvenue ₹499.00', 'CCAvenue ₹140.00', 'CCAvenue ₹1,700.00',
'CCAvenue ₹1,599.00', 'CCAvenue ₹6,999.00',
'Offline Payment ₹1,099.00', 'Offline Payment ₹1,999.00',
'Offline Payment ₹299.00', 'Offline Payment ₹549.00',
'Offline Payment ₹5,999.00',
'Offline Payment ₹1,099.00;Offline Payment -₹1,099.00',
'CCAvenue ₹1,099.00', 'Offline Payment ₹349.00',
'Offline Payment ₹199.00', 'Offline Payment ₹400.00',
'Offline Payment ₹1,199.00', 'Offline Payment ₹259.00',
'Offline Payment ₹5,097.00', 'Offline Payment ₹230.00',
'Offline Payment ₹4,369.05', 'Offline Payment ₹849.00',
'Offline Payment ₹1,399.00;Offline Payment -₹1,399.00',
'Offline Payment ₹699.00;Offline Payment -₹699.00',
'CCAvenue ₹2,099.00', 'CCAvenue ₹1,596.00', 'CCAvenue ₹1,399.00',
'Offline Payment ₹1,699.00', 'Offline Payment ₹800.00',
'Offline Payment ₹1,399.00', 'CCAvenue ₹2,199.00',
'Offline Payment ₹999.00', 'Offline Payment ₹499.00',
'Offline Payment ₹1,499.00', 'Offline Payment ₹44,970.00',
'Offline Payment ₹2,199.00',
'CCAvenue ₹1,499.00;Offline Payment -₹1,499.00',
'Offline Payment ₹200.00', 'Offline Payment ₹46,166.00',
'Offline Payment ₹43,168.00', 'Offline Payment ₹2,099.00',
'Offline Payment ₹449.00', 'Offline Payment ₹35,976.00',
'Offline Payment ₹2,798.00', 'Offline Payment ₹37,972.00',
'Offline Payment ₹5,196.00', 'Offline Payment ₹5,399.00',
'Offline Payment ₹3,298.00', 'Offline Payment ₹399.00',
'Offline Payment ₹17,578.00', 'Offline Payment ₹19,900.00',
'Offline Payment ₹35,774.00', 'Offline Payment ₹599.00',
'Offline Payment ₹369.00', 'Offline Payment ₹19,176.00',
'Offline Payment ₹19,576.00', 'Offline Payment ₹9,950.00',
'Offline Payment ₹13,990.00', 'Offline Payment ₹22,064.67',
'Offline Payment ₹38,519.00', 'Offline Payment ₹10,198.00',
'Offline Payment ₹22,980.00', 'Offline Payment ₹18,995.00',
'Offline Payment ₹22,995.00', 'Offline Payment ₹340.00',
'Offline Payment ₹899.00', 'Offline Payment ₹15,597.00',
'Offline Payment ₹22,485.00', 'CCAvenue ₹20,238.75',
'CCAvenue ₹299.00', 'CCAvenue ₹1,799.00',
'Offline Payment ₹10,398.00', 'Offline Payment ₹269.00',
'Offline Payment ₹1,095.00', 'Offline Payment ₹4,599.00',
'CCAvenue ₹749.00', 'Offline Payment ₹10,500.00',
'Offline Payment ₹3,399.00', 'Offline Payment ₹1,050.00',
'Offline Payment ₹300.00', 'Offline Payment ₹249.00',
'Offline Payment ₹522.50', 'Offline Payment ₹1,166.10',
'Offline Payment ₹250.00', 'CCAvenue ₹100.00',
'Offline Payment ₹12,500.00', 'Offline Payment ₹500.00',
'CCAvenue ₹300.00', 'CCAvenue ₹800.00', 'Offline Payment ₹100.00',
'Offline Payment ₹150.00', 'Offline Payment ₹2,000.00'],
dtype=object)
We can see that her is two type of payment method and values are mixed up with price
Using the where function of numpy to change the values.
pay_method_data['Payment Method'] = np.where(pay_method_data['Payment Method'].str.contains('CCAven'), "CCAvenue", "Offline Payment")
C:\Users\Admin\AppData\Local\Temp\ipykernel_10540\3840913070.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
pay_method_data['Payment Method'].unique()
array(['CCAvenue', 'Offline Payment'], dtype=object)
Handling Duplicates in dataset
pay_method_data.duplicated().sum()
2
pay_method_data.drop_duplicates()
pay_method_data.head()
| Order No | DateTime Stamp | Currency | Total | Shipping Country | Shipping City | Shipping State | Payment Method | Product Name | Quantity | |
|---|---|---|---|---|---|---|---|---|---|---|
| 26 | R113535804 | NaN | INR | 999 | IND | Dombivali | IN-MH | CCAvenue | Lava Captain N1 Red | 1 |
| 137 | R913187467 | NaN | INR | 219 | IND | Kakinada | IN-AP | CCAvenue | Fresh Mushrooms Button 200gm (Berhampur) | 1 |
| 138 | R204461089 | NaN | INR | 1499 | IND | Dindigul | IN-TN | CCAvenue | Vmax HX 750 Quadcopter Drone (No Camera) | 1 |
| 146 | R602350202 | NaN | INR | 349 | IND | Bengaluru | IN-KA | CCAvenue | Boat Super EXTRA BASS HEADS 225 WIRED WITH MIC... | 1 |
| 147 | R760464983 | NaN | INR | 799 | IND | NOIDA | IN-UP | CCAvenue | Solanki King Car Bugatti Style Steering Radio ... | 1 |
count_method = pay_method_data.groupby(['Payment Method'], as_index=False)['Order No'].count()
count_method
| Payment Method | Order No | |
|---|---|---|
| 0 | CCAvenue | 60 |
| 1 | Offline Payment | 199 |
payment_fig = px.bar(count_method, x='Payment Method', y='Order No', color="Payment Method", title='Different Payment Methods',
text_auto=True, labels={'Order No':'Number of Orders'})
payment_fig.show()
data.isnull().sum()
Order No 0 DateTime Stamp 2801 Currency 0 Total 0 Shipping Country 0 Shipping City 19 Shipping State 22 Payment Method 2780 Product Name 0 Quantity 0 dtype: int64
def new_func():
return data['Product Name'].unique()
print(list[new_func()])
list[array(['Falcon Drone Four Axis Aircraft with 2.4 GHz RC, Blade Guard, Headless Mode LED Without Camera',
'Barbie Doll (pink)',
'Sony MDR-G45LP On-Ear Street Wired Headphone',
'Reliance Jio Phone 2',
'QUECHUA Ultra Compact Travel 10 Litre Backpack (Blue)',
'Lava Captain N1 Red',
'Zebronics ZEB-16A 16 inch (39.6 cm) LED Monitor Full HD',
'Spider Man Mini Drone',
'Candytech SPEEDO Dual USB Charger with 2.6 Amp Power',
'ONLINE TUITION FOR STD 4 and STD 5(FREE 5 DAYS DEMO CBSE)',
'Samsung Earphones With Mic Tuned by AKG (Black)',
'Vmax HX 750 Quadcopter Drone (No Camera)',
'Fun Activities for Kids LKG to STD 10(Singing, Dancing, Drawing, Craft, Music, Yoga, Karate)',
'JBL A005 Portable Led Lightning Wireless Bluetooth Speaker',
'Quarter Chicken Biryani (Chennai)', 'Boat BASS HEADS 152 (Black)',
'Fresh Paneer 500gm (Berhampur)',
'boAT Airdopes X3 Bass Wireless Earbuds with Charging Case (White)',
'1KG Chicken Biryani (Chennai)',
'boAt BassHeads 229 Metal Earphones Hifi Stereo Headsets Metal Headphones With Mic',
'Drone Workshop Training Program',
'The Amazing Spider Man Micro Drone Q Series Hyun Lights Upgraded Quadcopter Headless Mode One Key Features',
'Super Power JCB Truck Construction Loader Excavator Crane Toys',
'Samsung U Flex Wireless Bluetooth Flexible Headphones with Mic (Black)',
'REALME BUDS PLUS Wireless Bluetooth Ears Buds Headphone',
'JBL Tempo On-Ear Headphone',
'Nova NHC-3791 Electric Hair Trimmer (Black)',
'Barbie Doll Toy Combo pack(2 Pack)', 'Reliance Jio Phone',
'REALME YOUTH BUDS Wireless Bluetooth Headphone',
'Zebronics ZEB-A19 19-inch LED Monitor',
'Refer and Earn Rs.2000 and Get Free Partnership Gift',
'Mutton Curry Cut 500gm (Berhampur)', 'Hot Wheels Car 5 Gift Pack',
'Puppy House Coin Piggy Bank', 'Summer Internship Gift',
'QUECHUA Mini Laptop Bag 10 Liter- Red/Black',
'Zebronics Radiant Multimedia Gaming Keyboard',
'DIGISonic DS1601 42cm (16) FULL HD LED Television',
'Boat Nirvanaa Uno Wired In-ear Earphones with Mic (Black)',
'SONY PlayStation PS4 PRO 1TB Hard Disk(Black)',
'1 Bucket Chicken Biryani Party Pack 5kg (Chennai)',
'OnePlus Bullets Wireless 3 Bluetooth Headphone(Black)',
'REALME Buds Air Tone Wireless Headphone',
'Preethi Zodiac MG 218 750-Watt Mixer Grinder with 5 Jars Black',
'ONLINE TUTION FOR CLASS- NURSERY,LKG,UKG(FREE 5 DAYS DEMO CLASS)',
'Chhota Bheem Remote Control Car',
'Skullcandy Earphone With Pouch Bag', 'Toy Family Doll House Set',
'Boat Rockerz 225 Plastic Wireless Bluetooth Headset with Mic (Black)',
'Zebronics ZEBVR100 Virtual Reality Kit VR box 3D 360 Degree',
'King Driver Remote Control Car',
'HX-715 Flying Helicopter With Remote Control Toy',
'Zebronics SAGA Portable Bluetooth wireless speaker',
'Boat BassHeads 250 with Hifi Sound Effect, in-line mic, Clear Human Voice,Noise Isolation With Precise Bass Wired Earphones (Black)',
'Berhampur Special Masala Papad 250gm',
'Fresh Mushrooms Button 200gm (Berhampur)',
'Country Chicken (Desi Murga) Curry Cut with skin 500gm(Berhampur)',
'Men Formal Shoes Black Color With Free Sunglasses',
'Mutton Curry Cut 1kg (Berhampur)',
'Boat Super EXTRA BASS HEADS 225 WIRED WITH MIC (Black)',
'Solanki King Car Bugatti Style Steering Radio Control Rechargeable Car',
'WS887 Mini Wireless Bluetooth Speaker',
'Sony PlayStation PS2 Gaming Console 150 GB Hard Disk With 50 Games Preloaded(Black)',
'Boat Rockerz 530 Foldable Bluetooth Headphone with Mic (Carbon Black)',
'boAt BassHeads 225 Wired Headphones with Mic and Carrying Case (Black)',
'VMax HX763 Vision Drone 2.4GHz RC Quad-copter Headless Mode One Key Without Camera',
'Realme Buds Wireless 2 Bluetooth In the Ear Headphone (Black)',
'SanDisk 16 GB memory card',
'ONLINE TUITION for CLASS 6TH TO 8TH (FREE 5 DAYS DEMO CLASS)',
'Samsung Level U Bluetooth Wireless in-Ear Headphones With Mic (Black)',
'Mi Wired Headphones with Mic Ultra-Deep Bass (Black)',
'Avengers Captain America Drone Four Axis Aircraft with 2.4 GHz Without Camera',
'Country Chicken (Desi Murga) raw with skin 1kg(Berhampur)',
'Candytech H111i Supersonic Bass earphone Wired Headset With Mic',
'LED TV Installation Uninstall Repair Services',
'Fresh Cow Milk 1Liter(Berhampur)',
'Kids Drone Quadcopter 2.4G 6-Channel Without Camera',
'Lg Tone Pro Hbs-750 Bluetooth Headset',
'Little Chef Kitchen Set With Convertible Suitcase',
'Vakura Catla River Fish Cutting(No Head) 1Kg(Berhampur)',
'Mutton Chops Special(kashi) 500gm(Berhampur)',
'Combo pack Toys Savoir Robot and Barbie doll',
'Realme Buds Wireless BlueTooth Headset(Black)',
'Business Analyst Internship Training Program',
'REALME Buds Wireless Bluetooth Headphone',
"KING'S CLASSIC INSTANT COFFEE",
'FERNWEH ORIGINALS INSTANT COFFEE', 'Minions Mini Drone',
'Diabolo Captain America Civil War Q Series Hyun Lights Upgraded Mini Drone Without Camera',
'Sony EX31BN Noise-Cancelling Bluetooth® In-ear Headphones',
'HR and Finance Dual Internship Training Course',
'Yoshops VR BOX Virtual Reality Glasses Headset 2.0 View Suitable For 4-6 Inch Smartphones',
'Boat Rockerz 255 Wireless Bluetooth Headset with Mic (Black)',
'Sony PlayStation PS3 Console Slim 320 GB (Black)',
'Boat BassHeads 160 with Mic Wired Stereo Headset (Black)',
'Samsung Galaxy M01 Core 1GB RAM with 16GB Storage (Black,Blue,Red)',
'YouTuber Prabeen Kumar -YouTube Channel-3 Subscriber-504 Instagram Follower-3k',
'Digital Marketing Internship Training Program',
'Artificial Intelligence and Machine learning Internship Training Program',
'Kasi yatra set silk cotton fabric decorated with rich kundans, brooches and laces',
'MitSonic 20 Inches (53 cm) FULL HD Gorilla Glass LED TV',
'Sony PlayStation PS2 Gaming Console PS2 150 GB Hard Disk With 50 Games Preloaded(Black)',
'Boat Rockerz 530 Bluetooth Headphone with Mic (Carbon Black)',
'Web Design & Development Internship Training Program',
'Vmax Voyager HX756 Drones Without Camera',
'Data Science Internship Training Program',
'Zebronics External TV Tuner Card TL1010',
'Flying Avengers Hero Induction Control helicopter',
'QUECHUA Ultra-Compact Backpack10-Litre - Black',
'HX770 V-Max Aircraft Drone',
'Preethi Armour 1.0L Electric Kettle',
'Avengers Captain America Drone Four Axis Aircraft with 2.4 GHz RC, Blade Guard, Headless Mode LED Without Camera',
'Zebronics ZEB-U740 UPS 600VA',
'Avengers Captain America Four Axis Aircraft Drone with 2.4 GHz RC, Blade Guard, Headless Mode LED Without Camera',
'Toy Gundam Robot with Gun (Red)',
'Vogue Girl Beautiful Fashion Doll',
'Sony On-Ear Headphone With Mic – MS177 (Black)',
'REALME RMA 950 BT EXTRA BASS Wireless Bluetooth Headphone',
'Realme Buds 3 Wired EARPHONE(Black)',
'Mi Neckband Bluetooth Headset with Mic (Black)',
'JBL Tempo On-Ear Headphone With Mic – J55iB (Black)',
'Realme Buds Wireless BT-R3 BlueTooth Headphone(Black)',
'JBL SH12 WIRELESS Bluetooth HEADPHONE',
'SYSKA Wireless EARPHONES H-15 BlueTooth Headphone(Black)',
'HX-713 Remote Control Helicopter',
'Samsung Galaxy M01 Core 2GB RAM with 32GB Storage (Black,Blue,Red)',
'Casio HR-100RC Printing Calculator',
'ZEBRONICS All-In-One USB Card Reader',
'Preethi STEELE SUPREME 750W 4 JAR MIXER GRINDER',
'SAMSUNG Travel Adapter White Battery Charger',
'Zebronics 18.5 inch Full HD LED Monitor',
'Boat Rockerz 335 STEREO EARPHONES (Black/Gold)',
'S52 Durable King Drone No Camera',
'Zebion Ergo PS2 USB Keyboard (Black and Red Keys)',
'HX708 Remote Control Helicopter',
'Ui Smart 5605N keypad phone Dual Sim(Black)',
'OnePlus Bullets Wireless3 BlueTooth Headphone(Black)',
'QUECHUA Sleeping Bag Arpenaz 20°C - Blue',
'Boat BassHeads 225 Super Extra Bass Headphone (Black)',
'Zebronics Card Reader VR BOX Special Combo',
'Preethi Trendy Plus Induction Cooktop', 'Wedding Doll Toy',
'JBL SH12 WIRELESS HEADPHONE', 'Cash On Delivery Services',
'Preethi Drip cafe Coffee Maker',
'Realme R-260 WIRE LES HEADPHONES',
'Dell KB216 Multimedia USB Wired Keyboard',
'Drone Quadcopter 2.4G 6-Channel Without Camera',
'Casio MJ-12SA Desktop Calculator -Black',
'Boat Rockerz 225 Metal Wireless Bluetooth Headset with Mic (Black)',
'Combo Meal Chicken Biriyani (Chennai)',
'BOROLINE ANTISEPTIC CREAM Combo',
'MI 10000mAH Li-Polymer Power Bank (Black)',
'Magic Tracks Bend Flex & Glow Racetrack with LED Flashing Race Cars Battery Operated',
'Ultra Thin Keyboard Cover for Laptop Keyboard',
'Laptop Spare Parts',
'Preethi X Pro Duo 1300 Watts Mixer Grinder Color(Purple)',
'Chicken Pakora-200Gm (Chennai)',
'Sales and Marketing Internship Training Course', 'Zebion Gamepad',
'Sony PlayStation 3 Console Slim 320 GB (Black)',
'QUECHUA Travel Multi-Compartment Pouch - Brown',
'Mobile Spare Part',
'Sony PlayStation PS2 with in-built DVD Player (Black)',
'Cheetah Construction Truck with Flash Excavator',
'Sony PlayStation 2 Gaming Console PS2 150 GB Hard Disk With 50 Games Preloaded(Black)',
'Zebronics H-100HM Headphone with Mic',
'Navigator Quadcopter Remote Control Drone',
'QY66 D1 Drone - 6 Axis Gyro RC Quadcopter - No camera',
'Sony PS4 Slim 1TB Console 1000 GB (Black)',
'Combo Set of 6 Pull Back Toy Car Small',
'3 Wheeled Scooter For Kids',
'Digital Signature Certificate (DSC) Class 2 valid for 2 Years with USB Device',
'Preethi Essence Juicer', 'Celkon C107 Black',
'Nokia BH-103 Bluetooth Stereo Headset with Mic',
'Mitsun MIT2210 20 Inches (51cm) HD Ready Ultra Slim Gorilla Glass LED TV',
'Mitsonic 24 Inches (60 cm) Full HD Gorilla Glass LED TV',
'Sony PlayStation 2 Gaming Console PS2 150 GB Hard Disk With 50 Games(Black)',
'DOMYOS S500 Boys Gym Sports Half-Sleeved T-Shirt (Grey)',
'Sony PlayStation 2 Console(PS2 150 GB) With 50 Games',
'KIPSTA Light Sports Bag 15 Litres - Blue',
'Captain America 2.4GHz RC Quad-copter Drone',
'Vmax Voyager HX756 Drones', 'Barbie Doll Toy',
'VMax HX763 Vision Drone 2.4GHz RC Quad-copter Headless Mode One Key Features 6 Axis Stabilization System Gyro 4 Channel',
'The Amazing Spider Man Q series 2.4GHz RC Quad-copter Drone',
'QUECHUA Laptop Bag', 'VMax HX763 Vision Drone',
'VR BOX Virtual Reality Glasses Headset 2.0',
'Zebronics ZEB-VR Virtual Reality Headset VR BOX',
'Bluedio i4 Stereo Bluetooth Earphone',
'Sony PlayStation 2 Console(PS2) With 50 Games',
'CHOTTA BHEEM Mini English learning laptop',
'Zebronics ZEB-EM750 Headphone with Mic',
'Vmax HX 750 Remote Control Flying Drone No Camera',
'JBL EARPHONE POUCH BLACK',
'MitSonic 16 inches (40cm) FULL HD Gorilla Glass LED TV',
'Y0-100 Wireless Mouse',
'Sony MDR-EX250AP In-Ear Headphones with Mic (Black)',
'Zebronics BH330 Bluetooth Headphones',
'Train With Track light music battery operated train toy',
'Bulldozer Hercules Power Driving Super Truck (Yellow)',
'Inteligent cute white tom cat with touch recording story & music',
'Super Aeroplanist Power Aeroplane Wired Rotating Plane',
'Home Nursing Service', 'Chicken Biryani Plate(Chennai)',
'YoShops Multi Pin Cable 4in1',
'HX 750 Remote Control Flying Drone No Camera',
'VMax Vision HX763 Drone Remote Control Quadrocopter',
'1KG Chicken Biryani', 'Funny Bricks and Blocks Set',
'MitSonic 16 inches (40cm) FULL HD LED TV',
'MitSonic 20 Inches (53 cm) FULL HD LED TV',
'MitSonic 16 inches (40cm) LED TV',
'Bose Earphone With Pouch Black',
'MitSonic MT2001 20 Inches (53cm) FULL HD LED Television',
'HX 750 Remote Control Flying Drone', 'Mushrooms Button 250 gm',
'Auto ePass 2003 FIPS USB Token',
'BOSE Sound True SQ2 Headphones (White,Black,Red)',
'Casio DR-140TM Printing Calculator',
'Sandisk 16 GB Utility Pendrive -Black',
'Preethi SPICE 550W 3 JAR MIXER GRINDER',
'PREETHI GALAXY 750 WATTS MIXER GRINDER (PINK)',
'MitSonic MT2001 53cm (20) FULL HD LED Television',
'Sony AS210 Sports In-ear Headphones',
'Flying Helicopter With Remote Control Toy',
'Zebronics Blue Bird Bluetooth Earphones',
'JBL C100SI Wired Headphones (Black,Red,White)',
'Bubly Belly Matic SS Pressure Cooker', 'Barbie Doll',
'VR BOX 2.0',
'Samsung Universal Mobile Charger USB Power Wall Adapter',
'Wedding Dolls', 'Flying Helicopter With Remote Control',
'BOSE Sound True SQ2 Headphones (White,Black,Red)',
'SAMSUNG Travel Adapter EP-TA13IWEUGIN White Battery Charger (White)',
'Zebion Ergo Wireless Slim Fit G1600 Keyboard and Mouse',
'Organic Raw Honey 250Gms',
'YO Earpods With Mic Compatible with Apple Phone',
'Yo145 OTG Cable ', 'Rosra Black Analog Watch',
'QUECHUA Laptop Bag',
'Wireless Mouse,Multi Charger, Universal Adaptor,Head Phone with Mic,Laptop Led Light ',
'Mutton Biriyani', '1KG Chicken Biryani',
'Yo 400 Wire Less Headphones (Bluetooth )',
' Zebronics Wireless Optical Mouse', 'Movers Packer Service',
'Computer Repair Services', 'Micromax Canvas Lapbook',
'Designer Gown By ZARA', 'Zebronics 2400 mAh Power Bank - White',
'Philip SHQ2305WS Wired Headphones (Black,White)',
'JBL C100SI Wired Headphones (Black,Red,White,Blue)',
'JBL C100SI Wired Headphones (Black, In the Ear)',
'I Kall K-66 White',
'Philip SHQ2305WS Wired Headphones (Black, In the Ear)',
'Bose Sound True SQ2 Noise Cancelling Headphones ',
'YO- Earphones With Mic White',
'Bose Quiet Comfort 20 Acoustic Noise Cancelling Headphones (Black) ',
'AmbranePowerBank P-1310',
'Lenovo IdeaPad 100S-11 11.6-inch Laptop ',
'iBall Excelance CompBook 11.6-inch Laptop',
'Ear Wired Earphones With Mic White',
'Electric Steam Cooker Maestro MC1',
'AmbranePowerBank P-1111(white,Blue)', 'Samsung-Metro-350-White'],
dtype=object)]
product_sale = data.copy()
product_sale.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3039 entries, 0 to 3038 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Order No 3039 non-null object 1 DateTime Stamp 238 non-null object 2 Currency 3039 non-null object 3 Total 3039 non-null object 4 Shipping Country 3039 non-null object 5 Shipping City 3020 non-null object 6 Shipping State 3017 non-null object 7 Payment Method 259 non-null object 8 Product Name 3039 non-null object 9 Quantity 3039 non-null int64 dtypes: int64(1), object(9) memory usage: 237.5+ KB
Handling the duplicate values in the dataset
product_sale.duplicated().sum()
3
product_sale[product_sale.duplicated()]
| Order No | DateTime Stamp | Currency | Total | Shipping Country | Shipping City | Shipping State | Payment Method | Product Name | Quantity | |
|---|---|---|---|---|---|---|---|---|---|---|
| 2558 | R754403637 | 29-03-2019 00:59:45 +0530 | INR | 44970 | IND | Varanasi | IN-UP | Offline Payment ₹44,970.00 | Vmax HX 750 Remote Control Flying Drone No Camera | 15 |
| 2614 | R446418996 | 06-01-2019 01:33:48 +0530 | INR | 46166 | IND | Varanasi | IN-UP | Offline Payment ₹46,166.00 | Vmax HX 750 Remote Control Flying Drone No Camera | 5 |
| 2714 | R754510307 | NaN | INR | 36768 | IND | Varanasi | IN-UP | NaN | HX 750 Remote Control Flying Drone No Camera | 8 |
product_sale.drop_duplicates(inplace=True)
product_sale.dtypes
Order No object DateTime Stamp object Currency object Total object Shipping Country object Shipping City object Shipping State object Payment Method object Product Name object Quantity int64 dtype: object
product_sale['Total'].unique()
array(['2299', '349', '899', '3199', '598', '999', '3499', '1499', '299',
'1000', '499', '63', '169', '150', '399', '1999', '1299', '599',
'699', '300', '1899', '799', '4999', '1', '379', '4399', '549',
'31999', '7800', '400', '369', '1199', '559', '199', '792', '219',
'279', '1798', '8999', '450', '469', '1300', '2798', '1399', '130',
'2999', '269', '429', '321', '949', '389', '584', '49', '798',
'3495', '19999', '5499', '2199', '20', '0', '3198', '5949', '2',
'3000', '7999', '21', '7998', '1449', '280', '7949', '140', '1699',
'669', '1098', '698', '1398', '1598', '101', '649', '749', '6499',
'2495', '160', '14098', '19949', '14949', '3999', '11997', '2249',
'449', '1149', '149', '350', '800', '5999', '1249', '1998', '2049',
'750', '7,18,281.00', '1649', '498', '3498', '330', '1748', '897',
'1698', '30870', '3449', '1548', '1700', '1650', '3149', '3350',
'13600', '34000', '3400', '17000', '6398', '8500', '14999',
'1,69,950.00', '4497', '29998', '7299', '80', '2400.2', '420',
'2100', '9360', '2397', '9597', '99', '1600', '6999', '8990',
'398', '345', '1599', '1992', '24999', '6949', '3298', '1949',
'600', '4949', '258', '16490', '2949', '6799', '13998', '1099',
'6199', '9197', '1467', '849', '2500', '126', '259', '100', '5097',
'230', '340', '4369.05', '50', '2099', '1596', '500', '380',
'44970', '200', '46166', '260', '43168', '35976', '37972', '5196',
'5399', '5199', '17578', '19900', '35774', '2399', '19176',
'19576', '9950', '1168', '2097', '13990', '22064.67', '36768',
'38519', '2998', '780', '10198', '22980', '18995', '22995',
'15597', '22485', '20238.75', '1960.91', '1799', '10398', '1549',
'70', '1095', '1550', '4599', '10500', '12600', '2799', '3399',
'1050', '790', '3799', '2499', '629', '250', '249', '522.5',
'1166.1', '2994', '569', '47799', '2700', '1879', '6750', '6000',
'1995', '650', '9500', '700', '5590', '570', '760', '12500', '285',
'9,49,050.00', '14000', '45000', '9000', '49950', '4650', '18000',
'1800', '2000', '1,00,000.00', '4000'], dtype=object)
product_sale[product_sale['Total'].str.contains(',')]
| Order No | DateTime Stamp | Currency | Total | Shipping Country | Shipping City | Shipping State | Payment Method | Product Name | Quantity | |
|---|---|---|---|---|---|---|---|---|---|---|
| 879 | R765288033 | NaN | INR | 7,18,281.00 | IND | Bhilwara | IN-RJ | NaN | Cash On Delivery Services | 999 |
| 1434 | R078719573 | NaN | INR | 1,69,950.00 | IND | Mumbai | IN-MH | NaN | Reliance Jio Phone | 100 |
| 3002 | R121638056 | NaN | INR | 9,49,050.00 | IND | test | IN-AN | NaN | Bose Quiet Comfort 20 Acoustic Noise Cancellin... | 999 |
| 3003 | R121638056 | NaN | INR | 9,49,050.00 | IND | test | IN-AN | NaN | AmbranePowerBank P-1310 | 999 |
| 3021 | R168504447 | NaN | INR | 1,00,000.00 | IND | CHENNAI | IN-TN | NaN | Electric Steam Cooker Maestro MC1 | 50 |
Replacing the ',' with empty string and changing the datatype
product_sale['Total']=product_sale['Total'].replace('\D', '', regex=True).astype(int)
product_sale.dtypes
Order No object DateTime Stamp object Currency object Total int32 Shipping Country object Shipping City object Shipping State object Payment Method object Product Name object Quantity int64 dtype: object
product_sale = product_sale[product_sale['Shipping Country'] == 'IND']
Filtering the values which are not null in the Payment Method
product_sale = product_sale[product_sale['Payment Method'].notnull()]
product_sale.shape
(257, 10)
Changing the payment method into the right format
product_sale['Payment Method'] = np.where(product_sale['Payment Method'].str.contains('CCAven'), 'CCAvenue', 'Offline Payment')
product_sale['Payment Method'].unique()
array(['CCAvenue', 'Offline Payment'], dtype=object)
Applying the Groupby method in the required columns
top_products = product_sale.groupby(['Product Name', 'Payment Method'], as_index=False)['Total'].mean()
top_products.head()
| Product Name | Payment Method | Total | |
|---|---|---|---|
| 0 | 1KG Chicken Biryani | CCAvenue | 100.0 |
| 1 | 1KG Chicken Biryani (Chennai) | CCAvenue | 145.0 |
| 2 | Auto ePass 2003 FIPS USB Token | Offline Payment | 650.0 |
| 3 | Avengers Captain America Four Axis Aircraft Dr... | Offline Payment | 2299.0 |
| 4 | BOSE Sound True SQ2 Headphones (White,Black,Red) | CCAvenue | 300.0 |
Keeping Top 10 Products with their payment method
top_products = top_products.sort_values(by='Total', ascending=False)
top_products = top_products.head(10)
top_products
| Product Name | Payment Method | Total | |
|---|---|---|---|
| 28 | Drone Quadcopter 2.4G 6-Channel Without Camera | CCAvenue | 1.012837e+06 |
| 41 | HX770 V-Max Aircraft Drone | Offline Payment | 2.832464e+05 |
| 38 | HX-713 Remote Control Helicopter | Offline Payment | 2.327492e+05 |
| 60 | MitSonic 16 inches (40cm) FULL HD Gorilla Glas... | Offline Payment | 2.210010e+05 |
| 36 | HX 750 Remote Control Flying Drone No Camera | Offline Payment | 1.397341e+05 |
| 59 | Minions Mini Drone | Offline Payment | 3.956217e+04 |
| 42 | Hot Wheels Car 5 Gift Pack | Offline Payment | 3.851900e+04 |
| 94 | The Amazing Spider Man Q series 2.4GHz RC Quad... | Offline Payment | 3.851900e+04 |
| 90 | Spider Man Mini Drone | Offline Payment | 2.762944e+04 |
| 62 | MitSonic 16 inches (40cm) LED TV | Offline Payment | 2.299500e+04 |
top_products['Total'] = top_products['Total'].round(1)
top_products.reset_index(drop=True, inplace=True)
top_products.head()
| Product Name | Payment Method | Total | |
|---|---|---|---|
| 0 | Drone Quadcopter 2.4G 6-Channel Without Camera | CCAvenue | 1012837.0 |
| 1 | HX770 V-Max Aircraft Drone | Offline Payment | 283246.4 |
| 2 | HX-713 Remote Control Helicopter | Offline Payment | 232749.2 |
| 3 | MitSonic 16 inches (40cm) FULL HD Gorilla Glas... | Offline Payment | 221001.0 |
| 4 | HX 750 Remote Control Flying Drone No Camera | Offline Payment | 139734.1 |
Now, Visualizing the insights for better data understanding
product_fig = px.bar(top_products, x='Product Name', y='Total', color='Payment Method', labels={'Total':'Prices'},
title='Top 10 Products with Different Payment Methods', text = 'Total')
product_fig.show()
data.head()
| Order No | DateTime Stamp | Currency | Total | Shipping Country | Shipping City | Shipping State | Payment Method | Product Name | Quantity | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | R121113121 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 1 | R472890631 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 2 | R004476488 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 3 | R526038353 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 4 | R658530771 | NaN | INR | 349 | IND | NEW DELHI | IN-DL | NaN | Barbie Doll (pink) | 1 |
state_sale = data.copy()
state_sale.head()
| Order No | DateTime Stamp | Currency | Total | Shipping Country | Shipping City | Shipping State | Payment Method | Product Name | Quantity | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | R121113121 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 1 | R472890631 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 2 | R004476488 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 3 | R526038353 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 4 | R658530771 | NaN | INR | 349 | IND | NEW DELHI | IN-DL | NaN | Barbie Doll (pink) | 1 |
Handling the duplicates in the dataset
state_sale.duplicated().sum()
3
state_sale.drop_duplicates()
| Order No | DateTime Stamp | Currency | Total | Shipping Country | Shipping City | Shipping State | Payment Method | Product Name | Quantity | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | R121113121 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 1 | R472890631 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 2 | R004476488 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 3 | R526038353 | NaN | INR | 2299 | IND | Chandrapur | IN-MH | NaN | Falcon Drone Four Axis Aircraft with 2.4 GHz R... | 1 |
| 4 | R658530771 | NaN | INR | 349 | IND | NEW DELHI | IN-DL | NaN | Barbie Doll (pink) | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3034 | R243395005 | NaN | INR | 50 | IND | Gccch | IN-AN | NaN | Ear Wired Earphones With Mic White | 1 |
| 3035 | R607209508 | NaN | INR | 500 | IND | BERHAMPUR | IN-OR | NaN | I Kall K-66 White | 1 |
| 3036 | R140316623 | NaN | INR | 4000 | IND | berhampur | IN-OR | NaN | Electric Steam Cooker Maestro MC1 | 2 |
| 3037 | R192037084 | NaN | INR | 2000 | IND | sdas | IN-AN | NaN | Electric Steam Cooker Maestro MC1 | 1 |
| 3038 | R414254148 | 28-09-2016 19:05:30 +0530 | INR | 2000 | IND | sdas | IN-AN | Offline Payment ₹2,000.00 | Electric Steam Cooker Maestro MC1 | 1 |
3036 rows × 10 columns
state_sale = state_sale[state_sale['Shipping Country'] == 'IND']
state_sale.duplicated(subset=['Order No']).sum()
67
state_sale = state_sale.drop_duplicates(subset=['Order No'])
state_sale.shape
(2932, 10)
state_sale.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2932 entries, 0 to 3038 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Order No 2932 non-null object 1 DateTime Stamp 207 non-null object 2 Currency 2932 non-null object 3 Total 2932 non-null object 4 Shipping Country 2932 non-null object 5 Shipping City 2913 non-null object 6 Shipping State 2932 non-null object 7 Payment Method 228 non-null object 8 Product Name 2932 non-null object 9 Quantity 2932 non-null int64 dtypes: int64(1), object(9) memory usage: 252.0+ KB
state_sale = state_sale[~state_sale['Total'].str.contains(',')]
state_sale.shape
(2928, 10)
Converting the 'Total' data type into int after float
state_sale['Total'] = state_sale['Total'].astype(float)
state_sale['Total'] = state_sale['Total'].astype(int)
state_sale.dtypes
Order No object DateTime Stamp object Currency object Total int32 Shipping Country object Shipping City object Shipping State object Payment Method object Product Name object Quantity int64 dtype: object
top_state_sale = state_sale.groupby(['Shipping State'], as_index=False)['Total'].mean()
top_state_sale.head()
| Shipping State | Total | |
|---|---|---|
| 0 | IN-AN | 2550.496599 |
| 1 | IN-AP | 1802.073446 |
| 2 | IN-AR | 1699.000000 |
| 3 | IN-AS | 2772.027027 |
| 4 | IN-BR | 2080.910569 |
top_state_sale.sort_values(ascending=False, by='Total', inplace=True)
top_state_sale['Total']=top_state_sale['Total'].round(1)
top_state_sale.reset_index(drop=True, inplace=True)
top_state_sale = top_state_sale.head(10)
top_state_sale
| Shipping State | Total | |
|---|---|---|
| 0 | IN-SK | 31999.0 |
| 1 | IN-NL | 5761.8 |
| 2 | IN-CH | 5260.4 |
| 3 | IN-HP | 4876.2 |
| 4 | IN-PY | 4290.9 |
| 5 | IN-UP | 3938.1 |
| 6 | IN-JK | 3078.3 |
| 7 | IN-MP | 3050.0 |
| 8 | IN-UT | 2906.8 |
| 9 | IN-AS | 2772.0 |
Replacing the states code into their standard original names
top_state_sale['Shipping State'].replace({'IN-SK':'Sikkim', 'IN-NL':'Nagaland', 'IN-CH':'Chandigarh', 'IN-HP':'Himanchal Pradesh',
'IN-PY':'Puducherry', 'IN-UP':'Uttar Pradesh', 'IN-JK':'Jammu and Kashmir', 'IN-MP':'Madhya Pradesh', 'IN-UT':'Uttarakhand',
'IN-AS':'Assam'}, inplace=True)
Visualizing the data for better understanding
px.bar(top_state_sale, x='Shipping State', y='Total', text='Total', labels={'Total':'Avg_Sale', 'Shipping State':'State'},
title='Top 10 States By Average Sale', color='Shipping State')
!jupyter-nbconvert --to html Final_Project.ipynb
[NbConvertApp] Converting notebook Final_Project.ipynb to html [NbConvertApp] Writing 679769 bytes to Final_Project.html